/*

This code cleans MECS and collapses to industry x year fuel coef averages

*/

clear all
local dataRAW "/data/economic/"
local dataSTATA "/projects/data/dataSTATA"


*>----------------- loop over and append all MECS files ----------------<

local i = 0
foreach y of numlist 1985 1988 1991 1994 1998 2002 2006 2010 2014 {
	import sas using "/data/economic/mec/`y'/mec`y'.sas7bdat", case(lower) clear
	if !inlist(`y',1991,1994,2006,2010,2014) {
		destring year, replace
		replace year = `y'
	}
	if inlist(`y',1991,2006,2010,2014) {
		gen year = `y'
	}
	if `y' == 1994 {
		ren st fipsst
		gen year = `y'
		gen lbdnum =.
		drop e10
		ren qu10 q10
		ren eu10 e10
	}
	
	if inlist(`y', 2006, 2010) {
		ren fipst fipsst
	}
	if `y' == 1985 {
		ren stfips fipsst
		foreach var in qcf e {
			foreach num of numlist 34 35 36 37 38 78 79 83 84 {
				gen `var'`num'=.
			}
		}
		gen e28 =.
		gen q28 =.
		drop q10 e10
		ren qu10 q10
		ren eu10 e10
	}	
	if `y'== 1988 {
		ren stfips fipsst
		foreach var in qcf e {
			foreach num of numlist 28 34 35 36 37 38 78 79 83 84 {
				gen `var'`num'=.
			}
		}
	}	
	if `y' == 1991 {
		ren st fipsst
		foreach var in qcf e {
			gen `var'34 =.
			gen `var'35 =.
			gen `var'83 =.
			gen `var'84 =.
		}
		drop q10 e10
		ren qu10 q10
		ren eu10 e10
	}	
	if inlist(`y',1998,2002,2006,2010) {
		gen e20=.
	}
	if `y' == 2014 {
		ren rptfipst fipsst
		foreach var in qcf e {
			foreach num of numlist 20 22 24 47 70 {
				gen `var'`num'=.
			}
		}
		gen q22 =.
	}

	
	* identify quantities and expenditures 
	foreach var in qcf e {
		foreach num of numlist 20 21 22 23 24 27 28 30 34 35 36 37 38 90 40 41 42 43 44 47 60 61 62 63 64 70 71 72 78 79 83 84 {
			local varname `var'`num'
			destring `varname', replace
			replace `varname' = 0 if mi(`varname')
		}
	}
	
	foreach var in q e {
		foreach num of numlist 10 21 22 {
			local varname `var'`num'
			destring `varname', replace
			replace `varname' = 0 if mi(`varname')
		}
	}
	
	
	* Electricity
	ren q10 elec_q
	la var elec_q "Electricity Consumed (1000 kwh)"
	ren e10 elec_e
	la var elec_e "Electricity Expenditures (1000 USD)"
	
	
	* Crude Oil
	ren qcf20 crude_oil_q
	la var crude_oil_q "Quantity Crude Oil Consumed (bbl)"
	ren e20 crude_oil_e
	la var crude_oil_e "Crude Oil Expenditures (1000 USD)"

	
	* Residual Fuel Oil
	gen resid_fuel_oil_q = q21 // combine fuel and non-fuel consumption (e.g., feedstocks for refining)
	la var resid_fuel_oil_q "Residual Fuel Oil Consumed (bbl)"
	ren e21 resid_fuel_oil_e
	la var resid_fuel_oil_e "Residual Fuel Oil Expenditures (1000 USD)"
	
	* Dist Fuel Oil
	gen dist_fuel_oil_q = q22 // combine fuel and non-fuel consumption (e.g., feedstocks for refining)
	la var dist_fuel_oil_q "Dist Fuel Oil Consumed (bbl)"
	ren e22 dist_fuel_oil_e
	la var dist_fuel_oil_e "Dist Fuel Oil Expenditures (1000 USD)"
	
	* Gasoline
	ren qcf23 gas_q
	la var gas_q "Gasoline Consumed (gal)"
	ren e23 gas_e
	la var gas_e "Gasoline Expenditures (1000 USD)"
	
	* LPG 
	ren qcf24 lpg_q
	la var lpg_q "LPG Consumed (gal)"
	ren e24 lpg_e
	la var lpg_e "LPG Expenditures (1000 USD)"
	
	* Kerosene
	ren qcf27 kerosene_q
	la var kerosene_q "Kerosene Consumed (bbl)"
	ren e27 kerosene_e
	la var kerosene_e "Kerosene Expenditures (1000 USD)"
	
	* Diesel
	ren qcf28 diesel_q
	la var diesel_q "Diesel Consumed (bbl)"
	ren e28 diesel_e
	la var diesel_e "Diesel Expenditures (1000 USD)"
	
	* Natural Gas
	ren qcf30 nat_gas_q
	la var nat_gas_q "Natural Gas Consumed (1000 cu ft)"
	ren e30 nat_gas_e
	la var nat_gas_e "Natural Gas Expenditures (1000 USD)"
	
	* Mixed Gases
	gen mix_gas_q = qcf34 + qcf35
	la var mix_gas_q "Mixed Ethane, Butane, Propane Consumed (gal)"
	gen mix_gas_e = e34 + e35
	la var mix_gas_e "Mixed Ethane, Butane, Proprane Expenditures (1000 USD)"
	
	* Butane
	ren qcf36 butane_q
	la var butane_q "Butane Consumed (gal)"
	ren e36 butane_e
	la var butane_e "Butane Expenditures (1000 USD)"
	
	* Ethane
	ren qcf37 ethane_q
	la var ethane_q "Ethane Consumed (gal)"
	ren e37 ethane_e
	la var ethane_e "Ethane Expenditures (1000 USD)"
	
	* Propane
	ren qcf38 propane_q
	la var propane_q "Propane Consumed (gal)"
	ren e38 propane_e
	la var propane_e "Propane Expenditures (1000 USD)"
	
	* Biomass
	gen biomass_q = qcf90 + qcf83 + qcf84
	la var biomass_q "Biomass Consumed (million BTU)"
	gen biomass_e = e90 + e83 + e84
	la var biomass_e "Biomass Expenditures (1000 USD)"
	
	* Coal (different types)
	ren qcf40 coal_anth_q
	la var coal_anth_q "Anthracite Coal Consumed (short tons)"
	ren e40 coal_anth_e
	la var coal_anth_e "Anthracite Coal Expenditures (1000 USD)"
	
	ren qcf41 coal_bitsub_q
	la var coal_bitsub_q "Bittuminous and Sub. Coal Consumed (short tons)"
	ren e41 coal_bitsub_e
	la var coal_bitsub_e "Bittuminous and Sub. Coal Expenditures (1000 USD)"
	
	ren qcf42 coal_lignite_q
	la var coal_lignite_q "Lignite Coal Consumed (short tons)"
	ren e42 coal_lignite_e
	la var coal_lignite_e "Lignite Coal Expenditures (1000 USD)"
	
	ren qcf43 coal_coke_q
	la var coal_coke_q "Coal Coke Consumed (short tons)"
	ren e43 coal_coke_e
	la var coal_coke_e "Coal Coke Expenditures (1000 USD)"
	
	gen coal_breeze_q = qcf44 + qcf47
	la var coal_breeze_q "Coal Breeze Consumed (short tons)"
	gen coal_breeze_e = e44 + e47
	la var coal_breeze_e "Coal Breeze Expensitures (1000 USD)"
	
	* Misc Fuels
	ren qcf60 bf_gas_q
	la var bf_gas_q "Blast Furnace Gas Consumed (million BTU)"
	ren e60 bf_gas_e
	la var bf_gas_e "Blast Furnace Gas Expenditures (1000 USD)"
	
	ren qcf61 coke_gas_q
	la var coke_gas_q "Coke Oven Gas Consumed (million BTU)"
	ren e61 coke_gas_e
	la var coke_gas_e "Coke Oven Gas Expenditures (1000 USD)"
	
	ren qcf62 waste_gas_q
	la var waste_gas_q "Waste and Byproduct Gas Consumed (million BTU)"
	ren e62 waste_gas_e
	la var waste_gas_e "Waste and Byproduct Gas Expenditures (1000 USD)"
	
	ren qcf63 hydrogen_q
	la var hydrogen_q "Hydrogen Gas Consumed (million BTU)"
	ren e63 hydrogen_e
	la var hydrogen_e "Hydrogen Gas Expenditures (1000 USD)"
	
	ren qcf64 acetylene_q
	la var acetylene_q "Acetylene Consumed (cu ft)"
	ren e64 acetylene_e
	la var acetylene_e "Acetylene Expenditures (1000 USD)"
	
	gen petro_q = qcf70 + qcf78 + qcf79
	la var petro_q "Petroleum Coke Consumed (bbl)"
	gen petro_e = e70 + e78 + e79
	la var petro_e "Petroleum Coke Expenditures (1000 USD)"
	
	gen waste_other_q = qcf71 + qcf72
	la var waste_other_q "Waste Oil, Tars, and Other Materials Consumed (million BTU)"
	gen waste_other_e = e71 + e72
	la var waste_other_e "Waste Oil, Tars, and Other Materials Expenditures (1000 USD)"
	
	* indentify plants that only use electricity 
	if inlist(`y',1985,1988,1991,1994,2014) {
		gen elec_only =.
	}
	if inlist(`y',1998,2002,2006,2010) {
		gen elec_only = 0
		replace elec_only = 1 if eleconly10 == 1
	}
	la var elec_only "Equals 1 if Only Source of Energy is Purchased Electricity"
	
	* clean industry codes 
	* concord all industries to NAICS 1997
	if `y' < 1987 {
		destring sic4, replace
		preserve
			import delimited "/projects/dstafftransfer/transfer.20200428/conc_sic72_sic87.csv", clear
			ren sic72 sic4
			destring sic4, replace
			destring sic87, replace
			duplicates drop sic4, force
			tempfile concord72`y'
			sa `concord72`y'', replace
		restore
		merge m:1 sic4 using `concord72`y'', assert(1 2 3) keep(1 3) keepusing(sic4 sic87) nogen
		gen bestsic = sic4
		replace bestsic = sic87 if !mi(sic87)
	}
	if inlist(`y', 1988, 1991)  {
		destring sic4, replace
		ren sic4 bestsic
	}
	* now all industry codes are in sic87 so merge all sic87 with naics97
	* naics is saved in different formats each year
	if inlist(`y', 1985, 1988, 1991) {
		preserve
			import delimited "/projects/dstafftransfer/transfer.20200428/conc_sic87_naics97.csv", clear
			ren sic bestsic
			duplicates drop bestsic, force
			tempfile concord87`y'
			sa `concord87`y'', replace
		restore
		merge m:1 bestsic using `concord87`y'', assert(1 2 3) keep(1 3) keepusing(bestsic naics97) nogen
		la var bestsic "SIC 1987 Rev., 4-digit"
		ren naics97 bestnaics
		destring bestnaics, replace
	}	

	if `y' == 1994 {
		ren stratum bestsic
		destring bestsic, replace
		preserve
			import delimited "/projects/dstafftransfer/transfer.20200428/conc_sic87_naics97.csv", clear
			ren sic bestsic
			duplicates drop bestsic, force
			tempfile concord87`y'
			sa `concord87`y'', replace
		restore
		merge m:1 bestsic using `concord87`y'', assert(1 2 3) keep(1 3) keepusing(bestsic naics97) nogen
		la var bestsic "SIC 1987 Rev., 4-digit"
		ren naics97 bestnaics
		destring bestnaics, replace
		
		preserve
			import delimited "/projects/dstafftransfer/transfer.20200428/conc_sic87_naics97.csv", clear
			ren sic bestsic
			gen bestsic2 = floor(bestsic / 100)
			duplicates drop bestsic2, force
			tempfile concord87`y'
			sa `concord87`y'', replace
		restore
		gen bestsic2 = floor(bestsic / 100)
		merge m:1 bestsic2 using `concord87`y'', assert(1 2 3) keep(1 3) keepusing(bestsic2 naics97) nogen
		destring naics97, replace
		replace bestnaics = naics97 if mi(bestnaics)
		drop naics97
		
	}
	if inlist(`y', 1998, 2002) {
		ren nind bestnaics
		destring bestnaics, replace
	}	
	if `y' > 2002 & `y' < 2014 {
		destring naics, replace
		ren naics bestnaics
		ren wgt mecswt
	}	
	if `y' == 2014 {
		ren smpwgt mecswt
		gen bestnaics = substr(smpnaics,1,6)
		destring bestnaics, replace		
	}
	
	* exclude non-manuf industries
	drop if bestnaics > 399999
	
	if inlist(`y', 1991, 1994){
		rename cfn id
	}
	
	*only keep variables of interest
	keep lbdrevlnk id year bestnaics *_q *_e mecswt elec_only fipsst
	la var mecswt "MECS Survey Weights"
	destring fipsst, replace
	
	sum		
		
	if `i' == 1 append using "/projects/data/dataSTATA/economic/mecs_merged.dta"
	local i = 1
	sa "/projects/data/dataSTATA/economic/mecs_merged.dta", replace
	
}

*>-------------------- deflate expenditures -----------------------------------<

import delimited "/projects/dstafftransfer/transfer.20200428/naics5811.csv", clear 
* relevant deflators:
foreach var of varlist piship pimat pien piinv {
	gen `var'_n = `var' if year == 2011
	bys naics: egen `var'_2011 = mean(`var'_n)
	gen `var'_dfltr = `var'_2011 / `var'
	drop `var'_n `var'_2011 
	
}
keep naics year pien_dfltr
ren naics bestnaics

tempfile deflators
sa `deflators', replace

use "/projects/data/dataSTATA/economic/mecs_merged.dta", clear
merge m:1 bestnaics year using `deflators', assert(1 2 3) keep(1 3) nogen
foreach var of varlist *_e {
	replace `var' = `var' * pien_dfltr 
}
* correct change of units for electricity consumed in kWh 
replace elec_q = elec_q / 1000 if inlist(year, 2006, 2010, 2014)	



*>--------------- convert quantities to BTU -----------------------------------<
* conversion factors referenced to co2_vol_mass_updated.xls from EIA
* except CO2 for crude oil: https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations/
* except CO2 biomass, blast furnace gas, coke oven gas, waste gas, acetylene waste oil: EPA -- emissions factors for greenhouse gas emissions pdf 2014
* except CO2 for electricity, which is from eGRID 2014 (link to xls file in Dropbox)
* Conversions performed in secondary do file for disclosure purposes

do /projects/programs/codeSTATA/A_cleaning/2_cleanmecs_unitsconversions.do 
	
	
* industry-average BTU per $ raw fuel expenditures
	egen btu_tot = rowtotal( *_btu)
	la var btu_tot "Total Fuel Consumed (million BTU), incl. Electricity"
	egen cf_tot = rowtotal( *_e)
	la var cf_tot "Total Fuels Expenditures (1000 2011 USD), incl. Electricity"

	gen btu_cf = (btu_tot / ( cf_tot  * 1000) )
	la var btu_cf "Energy Fuel Intensity (million BTU per USD Fuel Expenditure, 2011)"
	egen co2_tot = rowtotal( *_co2)
	la var co2_tot "Total CO2 Consumed (kg)"
	gen co2_cf = (co2_tot / (cf_tot * 1000) )
	la var co2_cf "CO2 Fuel Intensity (kg per USD Fuel Expenditure, 2011)"
	
	
	gen elec_btu_cf = elec_btu / (elec_e * 1000)
	la var elec_btu_cf "Electricity Fuel Intensity (million BTU per USD Electricity Expenditure, 2011)"
	gen elec_co2_cf = elec_co2 / (elec_e * 1000)
	la var elec_co2_cf "CO2 Electricity Intensity (kg per USD Electricity Expenditure, 2011)"
	
	gen btu_raw = btu_tot - elec_btu
	la var btu_raw "Total Raw Fuel Consumed (million BTU), excl. Electricity"
	gen co2_raw = co2_tot - elec_co2
	la var co2_raw "Total Raw Fuel Consumed (kg CO2), excl. Electricity"
	gen cf_raw  = cf_tot  - elec_e
	la var cf_raw "Total Raw Fuels Expenditures (1000 2011 USD, excl. Electricity)"
	
	gen btu_cf_raw = btu_raw / (cf_raw * 1000)
	la var btu_cf_raw "Energy Raw Fuel Intensity (million BTU per USD Fuel Expenditure, 2011), excl. Elec."
	gen co2_cf_raw = co2_raw / (cf_raw * 1000)
	la var co2_cf_raw "CO2 Raw Fuel Intensity (kg per USD Fuel Expenditure, 2011), excl. Elec."
	
	
sum btu_cf, detail
drop if btu_cf > `r(p99)'
sum btu_cf, detail
drop if btu_cf < `r(p1)'

sa "/projects/data/dataSTATA/economic/mecs_merged.dta", replace


collapse (mean) co2_cf_raw  btu_cf_raw [pw=mecswt], by(bestnaics year)


la var btu_cf_raw "Industry-Avg Energy Raw Fuel Intensity (million BTU per USD Fuel Expenditure, 2011), excl. Elec."
la var co2_cf_raw "Industry-Avg CO2 Raw Fuel Intensity (kg per USD Fuel Expenditure, 2011), excl. Elec."

sa "/projects/data/dataSTATA/economic/mecs_merged_collapsed.dta", replace

